{ "cells": [ { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 129, "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv('pima_diabetes_dataset.csv')" ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 768 entries, 0 to 767\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Pregnancies 768 non-null int64 \n", " 1 Glucose 768 non-null int64 \n", " 2 BloodPressure 768 non-null int64 \n", " 3 SkinThickness 768 non-null int64 \n", " 4 Insulin 768 non-null int64 \n", " 5 BMI 768 non-null float64\n", " 6 DiabetesPedigreeFunction 768 non-null float64\n", " 7 Age 768 non-null int64 \n", " 8 Outcome 768 non-null int64 \n", "dtypes: float64(2), int64(7)\n", "memory usage: 54.1 KB\n" ] } ], "source": [ "data.info()" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [], "source": [ "# feature = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']\n", "# predection_class = ['species']" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [], "source": [ "X = data.drop(columns=['Outcome'])\n", "y = data['Outcome']" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shape of X_test is (231, 8)\n", "Shape of X_train is (537, 8)\n", "Shape of Y_test is (231,)\n", "Shape of Y_train is (537,)\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, Y_train, Y_test = train_test_split(X,y,test_size=0.30)\n", "print(f\"Shape of X_test is {X_test.shape}\")\n", "print(f\"Shape of X_train is {X_train.shape}\")\n", "print(f\"Shape of Y_test is {Y_test.shape}\")\n", "print(f\"Shape of Y_train is {Y_train.shape}\")" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [], "source": [ "from sklearn.naive_bayes import MultinomialNB\n", "clf = MultinomialNB()\n", "clf.fit(X_train, Y_train)\n", "Y_pred = clf.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy 56.27705627705628\n" ] } ], "source": [ "from sklearn import metrics\n", "print(\"Accuracy\", metrics.accuracy_score(Y_test, Y_pred)*100)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.7" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }